Charger les paquetages nécessaires
library(dada2)
library(phyloseq)
library(DESeq2)
library(phangorn)
library(vegan)
library(ggplot2)
library(ggpubr)
library(DECIPHER)
library(car)
Charger les fichiers de lectures de séquençage brutes (READS)
# Utiliser l'arborescence suivante :
# ./
# ├── Devoir3.Rmd
# └── data/
# ├── READS_DEVOIR3/
# │ ├── ...
# │ ├── *.fastq.gz
# ├── metadata_file.txt
# ├── silva_nr_v132_train_set.fa.gz
# └── silva_species_assignment_v132.fa.gz
path_to_data <- "~/Documents/BIF-4002/Devoir3/data" # À modifier
path_to_reads <- file.path(path_to_data, "READS_DEVOIR3")
list.files(path_to_reads)
## [1] "filtered" "MCP1a_S113_L001_R1_001.fastq.gz"
## [3] "MCP1a_S113_L001_R2_001.fastq.gz" "MCP2a_S114_L001_R1_001.fastq.gz"
## [5] "MCP2a_S114_L001_R2_001.fastq.gz" "MCP4a_S116_L001_R1_001.fastq.gz"
## [7] "MCP4a_S116_L001_R2_001.fastq.gz" "MCP5a_S117_L001_R1_001.fastq.gz"
## [9] "MCP5a_S117_L001_R2_001.fastq.gz" "MWP12a_S50_L001_R1_001.fastq.gz"
## [11] "MWP12a_S50_L001_R2_001.fastq.gz" "MWP18a_S111_L001_R1_001.fastq.gz"
## [13] "MWP18a_S111_L001_R2_001.fastq.gz" "MWP19a_S112_L001_R1_001.fastq.gz"
## [15] "MWP19a_S112_L001_R2_001.fastq.gz" "MWP8a_S47_L001_R1_001.fastq.gz"
## [17] "MWP8a_S47_L001_R2_001.fastq.gz" "RCP5a_S102_L001_R1_001.fastq.gz"
## [19] "RCP5a_S102_L001_R2_001.fastq.gz" "RCP6a_S103_L001_R1_001.fastq.gz"
## [21] "RCP6a_S103_L001_R2_001.fastq.gz" "RWP6a_S120_L001_R1_001.fastq.gz"
## [23] "RWP6a_S120_L001_R2_001.fastq.gz" "RWP7a_S121_L001_R1_001.fastq.gz"
## [25] "RWP7a_S121_L001_R2_001.fastq.gz"
Définir des listes spécifiques afin de distinguer le sens des lectures
forward_filenames <- sort(list.files(path_to_reads,
pattern = "_R1_001.fastq.gz",
full.names = TRUE))
reverse_filenames <- sort(list.files(path_to_reads,
pattern = "_R2_001.fastq.gz",
full.names = TRUE))
sample.names <- sapply(strsplit(basename(forward_filenames), "_"), `[`, 1)
Visualiser la qualité des séquences
plotQualityProfile(forward_filenames)
plotQualityProfile(reverse_filenames)
Définir les noms des fichiers correspondant aux séquences filtrées
forward_filtered <- file.path(path_to_reads, "filtered",
paste0(sample.names, "_F_filt.fastq.gz"))
reverse_filtered <- file.path(path_to_reads, "filtered",
paste0(sample.names, "_R_filt.fastq.gz"))
names(forward_filtered) <- sample.names
names(reverse_filtered) <- sample.names
Effectuer le contrôle qualité des séquences
# Score phred de 20-25 comme limite pour la troncation
# Modifier le paramètre multithread en fonction de votre CPU
output_filter <- filterAndTrim(forward_filenames, forward_filtered,
reverse_filenames, reverse_filtered,
truncLen = c(290, 250), maxN = 0,
maxEE = c(2, 2), truncQ = 2, rm.phix = TRUE,
compress = TRUE, multithread = 12)
output_filter
## reads.in reads.out
## MCP1a_S113_L001_R1_001.fastq.gz 50579 32814
## MCP2a_S114_L001_R1_001.fastq.gz 60250 36479
## MCP4a_S116_L001_R1_001.fastq.gz 58966 37214
## MCP5a_S117_L001_R1_001.fastq.gz 52279 33705
## MWP12a_S50_L001_R1_001.fastq.gz 83424 42398
## MWP18a_S111_L001_R1_001.fastq.gz 48898 30913
## MWP19a_S112_L001_R1_001.fastq.gz 59907 33833
## MWP8a_S47_L001_R1_001.fastq.gz 90222 47838
## RCP5a_S102_L001_R1_001.fastq.gz 62191 38401
## RCP6a_S103_L001_R1_001.fastq.gz 65707 42460
## RWP6a_S120_L001_R1_001.fastq.gz 58513 21406
## RWP7a_S121_L001_R1_001.fastq.gz 58796 24893
# Modifier le paramètre multithread en fonction de votre CPU
forward_errors <- learnErrors(forward_filtered, multithread = 12)
reverse_errors <- learnErrors(reverse_filtered, multithread = 12)
plotErrors(forward_errors, nominalQ = TRUE)
plotErrors(reverse_errors, nominalQ = TRUE)
forward_dereplicated <- derepFastq(forward_filtered)
reverse_dereplicated <- derepFastq(reverse_filtered)
# Modifier le paramètre multithread en fonction de votre CPU
forward_dada <- dada(forward_dereplicated, err = forward_errors,
multithread = 12)
reverse_dada <- dada(reverse_dereplicated, err = reverse_errors,
multithread = 12)
Construire le tableau d’abondances
mergers <- mergePairs(forward_dada, forward_dereplicated, reverse_dada,
reverse_dereplicated, verbose = F)
seqtab <- makeSequenceTable(mergers)
freq <- table(nchar(getSequences(seqtab)))
plot(freq, type = 'l', xlab = "Longueur de séquence", ylab = "Fréquence")
Supprimer les chimères et vérification finale
# Modifier le paramètre multithread en fonction de votre CPU
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus",
multithread = 12)
freq.nochim <- table(nchar(getSequences(seqtab.nochim)))
plot(freq.nochim, type = 'l', xlab = "Longueur de séquence",
ylab = "Fréquence")
Garder les ASVs / OTUs qui ne sont pas issus d’une lecture seule
seqtab.nochim.nchar <- nchar(colnames(seqtab.nochim)) >= 270
seqtab.nochim.nosingle <- as.data.frame(seqtab.nochim)[, seqtab.nochim.nchar]
seqtab.nochim.nosingle <- as.matrix(seqtab.nochim.nosingle)
Vérifier le nombre de reads à chaque étape
getN <- function(x) sum(getUniques(x))
track <- cbind(output_filter, sapply(forward_dada, getN),
sapply(reverse_dada, getN), sapply(mergers, getN),
rowSums(seqtab.nochim), rowSums(seqtab.nochim.nosingle))
colnames(track) <- c("initial", "post-filtration", "denoisedF", "denoisedR",
"merged", "sans-chimères",
"sans-chimères-sans-lecture-seule")
rownames(track) <- sample.names
# Modifier le paramètre multithread en fonction de votre CPU
taxonomy <- assignTaxonomy(seqtab.nochim.nosingle,
file.path(path_to_data,
"silva_nr_v132_train_set.fa.gz"),
multithread = 12)
taxonomy <- addSpecies(taxonomy,
file.path(path_to_data,
"silva_species_assignment_v132.fa.gz"),
allowMultiple = TRUE, n = 1e3)
Construire l’objet Phyloseq
metadata_df <- read.delim(file.path(path_to_data, "metadata_file.txt"),
sep = "\t", header = TRUE, row.names = 1)
metadata_df <- metadata_df[rownames(metadata_df) %in% sample.names, ]
my_phyloseq <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows = FALSE),
sample_data(metadata_df), tax_table(taxonomy))
Constuire l’arbre phylogénétique des séquences d’ASVs
dna_seqs <- Biostrings::DNAStringSet(taxa_names(my_phyloseq))
names(dna_seqs) <- taxa_names(my_phyloseq)
aligned_dna_seqs <- AlignSeqs(dna_seqs, verbose = FALSE)
dna_phyDat <- as.phyDat(as.DNAbin(aligned_dna_seqs))
phy_tree(my_phyloseq) <- NJ(dist.ml(dna_phyDat))
Renommer les séquences associées
my_phyloseq <- merge_phyloseq(my_phyloseq, dna_seqs)
taxa_names(my_phyloseq) <- paste0("ASV", seq(ntaxa(my_phyloseq)))
Normaliser les abondances
my_phyloseq.relative <- transform_sample_counts(my_phyloseq,
function(OTU) OTU / sum(OTU))
top30 <- names(sort(taxa_sums(my_phyloseq), decreasing = TRUE))[1:30]
my_phyloseq.top30 <- prune_taxa(top30, my_phyloseq.relative)
Visualiser par ordre
plot_bar(my_phyloseq.top30, fill = "Order") +
facet_wrap(~ Population, scales = "free_x")
Visualiser par famille
plot_bar(my_phyloseq.top30, fill = "Family") +
facet_wrap(~ Population, scales = "free_x")
Visualiser par genre
plot_bar(my_phyloseq.top30, fill = "Genus") +
facet_wrap(~ Population, scales = "free_x")
Répartir les échantillons par population
MC_phyloseq <- subset_samples(my_phyloseq.relative,
Population == "Malbaie_Captive")
MW_phyloseq <- subset_samples(my_phyloseq.relative,
Population == "Malbaie_Wild")
RC_phyloseq <- subset_samples(my_phyloseq.relative,
Population == "Rimouski_Captive")
RW_phyloseq <- subset_samples(my_phyloseq.relative,
Population == "Rimouski_Wild")
top20.MC <- names(sort(taxa_sums(MC_phyloseq), decreasing = TRUE))[1:20]
top20.MW <- names(sort(taxa_sums(MW_phyloseq), decreasing = TRUE))[1:20]
top20.RC <- names(sort(taxa_sums(RC_phyloseq), decreasing = TRUE))[1:20]
top20.RW <- names(sort(taxa_sums(RW_phyloseq), decreasing = TRUE))[1:20]
MC.top20 <- prune_taxa(top20.MC, MC_phyloseq)
MW.top20 <- prune_taxa(top20.MW, MW_phyloseq)
RC.top20 <- prune_taxa(top20.RC, RC_phyloseq)
RW.top20 <- prune_taxa(top20.RW, RW_phyloseq)
Visualiser par ordre
MC.barplot_order <- plot_bar(MC.top20, fill = "Order",
title = "Malbaie_Captive") +
scale_y_continuous(limits = c(0, 1))
MW.barplot_order <- plot_bar(MW.top20, fill = "Order",
title = "Malbaie_Wild") +
scale_y_continuous(limits = c(0, 1))
RC.barplot_order <- plot_bar(RC.top20, fill = "Order",
title = "Rimouski_Captive") +
scale_y_continuous(limits = c(0, 1))
RW.barplot_order <- plot_bar(RW.top20, fill = "Order",
title = "Rimouski_Wild") +
scale_y_continuous(limits = c(0, 1))
ggarrange(MC.barplot_order, MW.barplot_order, RC.barplot_order,
RW.barplot_order, labels = c("A", "B","C", "D"), ncol = 2, nrow = 2)
Visualiser par famille
MC.barplot_family <- plot_bar(MC.top20, fill = "Family",
title = "Malbaie_Captive") +
scale_y_continuous(limits = c(0, 1))
MW.barplot_family <- plot_bar(MW.top20, fill = "Family",
title = "Malbaie_Wild") +
scale_y_continuous(limits = c(0, 1))
RC.barplot_family <- plot_bar(RC.top20, fill = "Family",
title = "Rimouski_Captive") +
scale_y_continuous(limits = c(0, 1))
RW.barplot_family <- plot_bar(RW.top20, fill = "Family",
title = "Rimouski_Wild") +
scale_y_continuous(limits = c(0, 1))
ggarrange(MC.barplot_family, MW.barplot_family, RC.barplot_family,
RW.barplot_family, labels = c("A", "B","C", "D"), ncol = 2, nrow = 2)
Visualiser par genre
MC.barplot_genus <- plot_bar(MC.top20, fill = "Genus",
title = "Malbaie_Captive") +
scale_y_continuous(limits = c(0, 1))
MW.barplot_genus <- plot_bar(MW.top20, fill = "Genus",
title = "Malbaie_Wild") +
scale_y_continuous(limits = c(0, 1))
RC.barplot_genus <- plot_bar(RC.top20, fill = "Genus",
title = "Rimouski_Captive") +
scale_y_continuous(limits = c(0, 1))
RW.barplot_genus <- plot_bar(RW.top20, fill = "Genus",
title = "Rimouski_Wild") +
scale_y_continuous(limits = c(0, 1))
ggarrange(MC.barplot_genus, MW.barplot_genus, RC.barplot_genus,
RW.barplot_genus, labels = c("A", "B","C", "D"), ncol = 2, nrow = 2)
plot_richness(my_phyloseq, x = "Population",
measures = c("Shannon", "Simpson", "InvSimpson", "Chao1"),
color = "Population", title = "Graphiques de diversité alpha")
Chercher les valeurs de diversité alpha et ajouter les métadonnées
test_names <- c("Chao1", "Shannon", "InvSimpson", "Fisher")
alpha_data <- estimate_richness(my_phyloseq, measures = test_names)
alpha_data <- merge(x = alpha_data, y = metadata_df, by = "row.names")
rownames(alpha_data) <- alpha_data$Row.names
Test de Shapiro-Wilk (normalité)
shapiro.test(alpha_data$Chao1)
##
## Shapiro-Wilk normality test
##
## data: alpha_data$Chao1
## W = 0.82192, p-value = 0.01681
shapiro.test(alpha_data$Shannon)
##
## Shapiro-Wilk normality test
##
## data: alpha_data$Shannon
## W = 0.78564, p-value = 0.006462
shapiro.test(alpha_data$InvSimpson)
##
## Shapiro-Wilk normality test
##
## data: alpha_data$InvSimpson
## W = 0.79131, p-value = 0.007475
shapiro.test(alpha_data$Fisher)
##
## Shapiro-Wilk normality test
##
## data: alpha_data$Fisher
## W = 0.76709, p-value = 0.004055
Test de Levene (homogénéité des variances)
for (test in test_names)
{
test_formula <- paste(test, "~", "Population")
print(paste("Test de l'homogénéité de la variance :", test))
print(leveneTest(as.formula(test_formula), data = alpha_data))
}
## [1] "Test de l'homogénéité de la variance : Chao1"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 18.172 0.0006252 ***
## 8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] "Test de l'homogénéité de la variance : Shannon"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 21.974 0.0003226 ***
## 8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] "Test de l'homogénéité de la variance : InvSimpson"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 38.784 4.106e-05 ***
## 8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] "Test de l'homogénéité de la variance : Fisher"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 37.075 4.853e-05 ***
## 8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ANOVA (si les deux postulats sont respectés)
for (test in test_names)
{
test_formula <- paste(test, "~", "Population")
aov.Type <- aov(formula = as.formula(test_formula), data = alpha_data)
print(paste("Mesure de diversité alpha :", test))
print(summary(aov.Type))
print(TukeyHSD(aov.Type))
}
## [1] "Mesure de diversité alpha : Chao1"
## Df Sum Sq Mean Sq F value Pr(>F)
## Population 3 24476 8159 4.641 0.0367 *
## Residuals 8 14062 1758
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = as.formula(test_formula), data = alpha_data)
##
## $Population
## diff lwr upr p adj
## Malbaie_Wild-Malbaie_Captive -35.625 -130.56200 59.3120033 0.6426946
## Rimouski_Captive-Malbaie_Captive 12.375 -103.89861 128.6486079 0.9853836
## Rimouski_Wild-Malbaie_Captive -122.625 -238.89861 -6.3513921 0.0391218
## Rimouski_Captive-Malbaie_Wild 48.000 -68.27361 164.2736079 0.5755454
## Rimouski_Wild-Malbaie_Wild -87.000 -203.27361 29.2736079 0.1552251
## Rimouski_Wild-Rimouski_Captive -135.000 -269.26120 -0.7388023 0.0487759
##
## [1] "Mesure de diversité alpha : Shannon"
## Df Sum Sq Mean Sq F value Pr(>F)
## Population 3 9.586 3.195 8.321 0.00766 **
## Residuals 8 3.072 0.384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = as.formula(test_formula), data = alpha_data)
##
## $Population
## diff lwr upr p adj
## Malbaie_Wild-Malbaie_Captive -0.6461889 -2.049451 0.7570734 0.4932129
## Rimouski_Captive-Malbaie_Captive -0.3130941 -2.031732 1.4055443 0.9343293
## Rimouski_Wild-Malbaie_Captive -2.6124897 -4.331128 -0.8938513 0.0054387
## Rimouski_Captive-Malbaie_Wild 0.3330949 -1.385543 2.0517332 0.9225955
## Rimouski_Wild-Malbaie_Wild -1.9663007 -3.684939 -0.2476624 0.0262929
## Rimouski_Wild-Rimouski_Captive -2.2993956 -4.283908 -0.3148830 0.0246620
##
## [1] "Mesure de diversité alpha : InvSimpson"
## Df Sum Sq Mean Sq F value Pr(>F)
## Population 3 4277 1425.8 2.956 0.0979 .
## Residuals 8 3858 482.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = as.formula(test_formula), data = alpha_data)
##
## $Population
## diff lwr upr p adj
## Malbaie_Wild-Malbaie_Captive -21.965911 -71.69576 27.76394 0.5250901
## Rimouski_Captive-Malbaie_Captive -20.414369 -81.32075 40.49201 0.7141378
## Rimouski_Wild-Malbaie_Captive -56.482060 -117.38844 4.42432 0.0694205
## Rimouski_Captive-Malbaie_Wild 1.551542 -59.35484 62.45792 0.9997881
## Rimouski_Wild-Malbaie_Wild -34.516149 -95.42253 26.39023 0.3335828
## Rimouski_Wild-Rimouski_Captive -36.067691 -106.39632 34.26094 0.4096768
##
## [1] "Mesure de diversité alpha : Fisher"
## Df Sum Sq Mean Sq F value Pr(>F)
## Population 3 669.3 223.09 4.87 0.0326 *
## Residuals 8 366.5 45.81
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = as.formula(test_formula), data = alpha_data)
##
## $Population
## diff lwr upr p adj
## Malbaie_Wild-Malbaie_Captive -7.4373394 -22.76303 7.8883531 0.4525851
## Rimouski_Captive-Malbaie_Captive 0.7080254 -18.06204 19.4780887 0.9993148
## Rimouski_Wild-Malbaie_Captive -20.6199820 -39.39005 -1.8499187 0.0321614
## Rimouski_Captive-Malbaie_Wild 8.1453648 -10.62470 26.9154282 0.5384752
## Rimouski_Wild-Malbaie_Wild -13.1826426 -31.95271 5.5874208 0.1897851
## Rimouski_Wild-Rimouski_Captive -21.3280074 -43.00181 0.3457948 0.0537295
Test de Kruskal-Wallis (non paramétrique)
for (test in test_names)
{
test_formula <- paste(test, "~", "Population")
print(paste("Mesure de diversité alpha :", test))
print(kruskal.test(as.formula(test_formula), data = alpha_data))
}
## [1] "Mesure de diversité alpha : Chao1"
##
## Kruskal-Wallis rank sum test
##
## data: Chao1 by Population
## Kruskal-Wallis chi-squared = 5.8269, df = 3, p-value = 0.1203
##
## [1] "Mesure de diversité alpha : Shannon"
##
## Kruskal-Wallis rank sum test
##
## data: Shannon by Population
## Kruskal-Wallis chi-squared = 4.75, df = 3, p-value = 0.191
##
## [1] "Mesure de diversité alpha : InvSimpson"
##
## Kruskal-Wallis rank sum test
##
## data: InvSimpson by Population
## Kruskal-Wallis chi-squared = 4.9615, df = 3, p-value = 0.1746
##
## [1] "Mesure de diversité alpha : Fisher"
##
## Kruskal-Wallis rank sum test
##
## data: Fisher by Population
## Kruskal-Wallis chi-squared = 5.4231, df = 3, p-value = 0.1433
Test de Wilcoxon (non paramétrique)
for (test in test_names)
{
test_formula <- paste(test, "~", "Population")
print(paste("Mesure de diversité alpha :", test))
print(pairwise.wilcox.test(alpha_data[, test], alpha_data$Population,
p.adjust.method = "fdr"))
}
## [1] "Mesure de diversité alpha : Chao1"
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: alpha_data[, test] and alpha_data$Population
##
## Malbaie_Captive Malbaie_Wild Rimouski_Captive
## Malbaie_Wild 0.89 - -
## Rimouski_Captive 0.50 0.64 -
## Rimouski_Wild 0.40 0.40 0.50
##
## P value adjustment method: fdr
## [1] "Mesure de diversité alpha : Shannon"
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: alpha_data[, test] and alpha_data$Population
##
## Malbaie_Captive Malbaie_Wild Rimouski_Captive
## Malbaie_Wild 1.00 - -
## Rimouski_Captive 1.00 1.00 -
## Rimouski_Wild 0.40 0.40 0.67
##
## P value adjustment method: fdr
## [1] "Mesure de diversité alpha : InvSimpson"
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: alpha_data[, test] and alpha_data$Population
##
## Malbaie_Captive Malbaie_Wild Rimouski_Captive
## Malbaie_Wild 0.80 - -
## Rimouski_Captive 0.80 0.80 -
## Rimouski_Wild 0.40 0.40 0.67
##
## P value adjustment method: fdr
## [1] "Mesure de diversité alpha : Fisher"
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: alpha_data[, test] and alpha_data$Population
##
## Malbaie_Captive Malbaie_Wild Rimouski_Captive
## Malbaie_Wild 0.89 - -
## Rimouski_Captive 0.64 0.64 -
## Rimouski_Wild 0.40 0.40 0.64
##
## P value adjustment method: fdr
NMDS
ordination.nmds.bray <- ordinate(my_phyloseq.relative, method = "NMDS",
distance = "bray")
ordination.nmds.unifrac <- ordinate(my_phyloseq.relative, method = "NMDS",
distance = "unifrac")
ordination.nmds.wunifrac <- ordinate(my_phyloseq.relative, method = "NMDS",
distance = "unifrac", weighted = TRUE)
NMDS.bray.plot <- plot_ordination(my_phyloseq.relative, ordination.nmds.bray,
color = "Population",
title = "Bray NMDS by sample") +
stat_ellipse(level = 0.95)
NMDS.UniFrac.plot <- plot_ordination(my_phyloseq.relative,
ordination.nmds.unifrac,
color = "Population",
title = "Unifrac NMDS by sample") +
stat_ellipse(level = 0.95)
NMDS.WUniFrac.plot <- plot_ordination(my_phyloseq.relative,
ordination.nmds.wunifrac,
color = "Population",
title = "Weighted Unifrac NMDS by
sample") +
stat_ellipse(level = 0.95)
ggarrange(NMDS.bray.plot , ggarrange(NMDS.UniFrac.plot, NMDS.WUniFrac.plot,
ncol = 2),
labels = c("A", "B"), nrow = 2)
PCoA
ordination.pcoa.bray <- ordinate(my_phyloseq.relative, method = "PCoA",
distance = "bray")
ordination.pcoa.unifrac = ordinate(my_phyloseq.relative, method = "PCoA",
distance = "unifrac")
ordination.pcoa.wunifrac = ordinate(my_phyloseq.relative, method = "PCoA",
distance = "unifrac", weighted = TRUE)
PCOA.bray.plot <- plot_ordination(my_phyloseq.relative, ordination.pcoa.bray,
color = "Population",
title = "Bray PCoA by sample") +
stat_ellipse(level = 0.95)
PCOA.UniFrac.plot <- plot_ordination(my_phyloseq.relative,
ordination.pcoa.unifrac,
color = "Population",
title = "Unifrac PCoA by sample") +
stat_ellipse(level = 0.95)
PCOA.WUniFrac.plot <- plot_ordination(my_phyloseq.relative,
ordination.pcoa.wunifrac,
color = "Population",
title = "Weighted Unifrac PCoA by
sample") +
stat_ellipse(level = 0.95)
ggarrange(PCOA.bray.plot , ggarrange(PCOA.UniFrac.plot, PCOA.WUniFrac.plot,
ncol = 2, common.legend = TRUE,
legend = "right"),
labels = c("A", "B"), nrow = 2, legend = "right")
Extraire les matrices de diversité bêta
distance.matrix.unifrac <- UniFrac(my_phyloseq.relative)
distance.matrix.wunifrac <- UniFrac(my_phyloseq.relative, weighted = TRUE)
distance.matrix.bray <- phyloseq::distance(my_phyloseq.relative,
method = "bray")
ADONIS
adonis(distance.matrix.unifrac ~ Population, data = metadata_df,
permutations = 1000)[-5]
## $aov.tab
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Population 3 1.8973 0.63244 2.6886 0.50205 0.000999 ***
## Residuals 8 1.8818 0.23523 0.49795
## Total 11 3.7791 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $call
## adonis(formula = distance.matrix.unifrac ~ Population, data = metadata_df,
## permutations = 1000)
##
## $coefficients
## NULL
##
## $coef.sites
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) 0.72342373 0.7165099 0.7083156 0.72814143 0.805624369
## Population1 -0.26921534 -0.2812108 -0.2560291 -0.24780689 0.006185426
## Population2 0.13892764 0.1530855 0.1421215 0.11308513 -0.176605790
## Population3 -0.09812944 -0.1102265 -0.1264871 -0.09334202 0.018127541
## [,6] [,7] [,8] [,9] [,10]
## (Intercept) 0.85195245 0.78850827 0.82025148 0.68669190 0.67795200
## Population1 0.04074275 0.02750523 0.08284032 -0.06230201 -0.07823912
## Population2 -0.19404991 -0.15787917 -0.17511736 0.17078136 0.17396812
## Population3 0.03646801 0.02122863 0.07662601 -0.38291532 -0.37417541
## [,11] [,12]
## (Intercept) 0.7428868 0.7435518
## Population1 0.2098331 0.2095373
## Population2 0.1711876 0.1725062
## Population3 0.2157227 0.2153650
##
## $model.matrix
## (Intercept) Population1 Population2 Population3
## MCP1a 1 1 0 0
## MCP2a 1 1 0 0
## MCP4a 1 1 0 0
## MCP5a 1 1 0 0
## MWP12a 1 0 1 0
## MWP18a 1 0 1 0
## MWP19a 1 0 1 0
## MWP8a 1 0 1 0
## RCP5a 1 0 0 1
## RCP6a 1 0 0 1
## RWP6a 1 -1 -1 -1
## RWP7a 1 -1 -1 -1
##
## $terms
## distance.matrix.unifrac ~ Population
## attr(,"variables")
## list(distance.matrix.unifrac, Population)
## attr(,"factors")
## Population
## distance.matrix.unifrac 0
## Population 1
## attr(,"term.labels")
## [1] "Population"
## attr(,"order")
## [1] 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
adonis(distance.matrix.wunifrac ~ Population, data = metadata_df,
permutations = 1000)[-5]
## $aov.tab
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Population 3 0.88050 0.293499 4.7114 0.63857 0.000999 ***
## Residuals 8 0.49836 0.062295 0.36143
## Total 11 1.37886 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $call
## adonis(formula = distance.matrix.wunifrac ~ Population, data = metadata_df,
## permutations = 1000)
##
## $coefficients
## NULL
##
## $coef.sites
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) 0.43039214 0.4227292 0.4395561 0.42843251 0.44486089
## Population1 -0.23399739 -0.2747719 -0.2720918 -0.22335543 0.09502242
## Population2 0.12154394 0.1554981 0.1573792 0.10995236 -0.15824106
## Population3 -0.08514333 -0.1135847 -0.1260422 -0.07704848 0.05868013
## [,6] [,7] [,8] [,9] [,10]
## (Intercept) 0.45876831 0.50202015 0.44385513 0.4276048 0.4056607
## Population1 0.08707781 0.03199058 0.20188832 0.0349019 -0.2085217
## Population2 -0.15833137 -0.11627192 -0.08627177 0.0758539 0.1539116
## Population3 0.05269271 0.02610696 0.13907776 -0.2057735 -0.1838294
## [,11] [,12]
## (Intercept) 0.412176497 0.412213211
## Population1 0.233458206 0.233537965
## Population2 0.006876081 0.006776164
## Population3 0.171138087 0.171194960
##
## $model.matrix
## (Intercept) Population1 Population2 Population3
## MCP1a 1 1 0 0
## MCP2a 1 1 0 0
## MCP4a 1 1 0 0
## MCP5a 1 1 0 0
## MWP12a 1 0 1 0
## MWP18a 1 0 1 0
## MWP19a 1 0 1 0
## MWP8a 1 0 1 0
## RCP5a 1 0 0 1
## RCP6a 1 0 0 1
## RWP6a 1 -1 -1 -1
## RWP7a 1 -1 -1 -1
##
## $terms
## distance.matrix.wunifrac ~ Population
## attr(,"variables")
## list(distance.matrix.wunifrac, Population)
## attr(,"factors")
## Population
## distance.matrix.wunifrac 0
## Population 1
## attr(,"term.labels")
## [1] "Population"
## attr(,"order")
## [1] 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
adonis(distance.matrix.bray ~ Population, data = metadata_df,
permutations = 1000)[-5]
## $aov.tab
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Population 3 2.5707 0.85689 3.6923 0.58064 0.000999 ***
## Residuals 8 1.8566 0.23208 0.41936
## Total 11 4.4273 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $call
## adonis(formula = distance.matrix.bray ~ Population, data = metadata_df,
## permutations = 1000)
##
## $coefficients
## NULL
##
## $coef.sites
## [,1] [,2] [,3] [,4] [,5] [,6]
## (Intercept) 0.7325407 0.6966602 0.6954877 0.7144865 0.91679199 0.92878082
## Population1 -0.3818396 -0.4205644 -0.4147734 -0.4039750 0.06581757 0.06854613
## Population2 0.2600739 0.2977806 0.2949963 0.2750036 -0.21831736 -0.21004005
## Population3 -0.1417756 -0.1805560 -0.1837578 -0.1565421 0.06929178 0.07027474
## [,7] [,8] [,9] [,10] [,11] [,12]
## (Intercept) 0.92871477 0.92752245 0.75505975 0.6817376 0.7555053 0.7555053
## Population1 0.05923083 0.07162487 -0.06628994 -0.2822356 0.2432708 0.2432708
## Population2 -0.18917125 -0.21514578 0.23971944 0.3090208 0.2444947 0.2444947
## Population3 0.05865519 0.07104336 -0.41836975 -0.3450476 0.2444947 0.2444947
##
## $model.matrix
## (Intercept) Population1 Population2 Population3
## MCP1a 1 1 0 0
## MCP2a 1 1 0 0
## MCP4a 1 1 0 0
## MCP5a 1 1 0 0
## MWP12a 1 0 1 0
## MWP18a 1 0 1 0
## MWP19a 1 0 1 0
## MWP8a 1 0 1 0
## RCP5a 1 0 0 1
## RCP6a 1 0 0 1
## RWP6a 1 -1 -1 -1
## RWP7a 1 -1 -1 -1
##
## $terms
## distance.matrix.bray ~ Population
## attr(,"variables")
## list(distance.matrix.bray, Population)
## attr(,"factors")
## Population
## distance.matrix.bray 0
## Population 1
## attr(,"term.labels")
## [1] "Population"
## attr(,"order")
## [1] 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
Extraire les 100 ASVs les plus abondants
top100 <- names(sort(taxa_sums(my_phyloseq.relative), decreasing = TRUE))[1:100]
my_phyloseq.top100 <- prune_taxa(top100, my_phyloseq.relative)
NMDS
ordination.nmds.bray.top100 <- ordinate(my_phyloseq.top100, method = "NMDS",
distance = "bray")
ordination.nmds.unifrac.top100 <- ordinate(my_phyloseq.top100, method = "NMDS",
distance = "unifrac")
ordination.nmds.wunifrac.top100 <- ordinate(my_phyloseq.top100, method = "NMDS",
distance = "unifrac",
weighted = TRUE)
NMDS.bray.plot.top100 <- plot_ordination(my_phyloseq.top100,
ordination.nmds.bray,
color = "Population",
title = "Bray NMDS by sample") +
stat_ellipse(level = 0.95)
NMDS.UniFrac.plot.top100 <- plot_ordination(my_phyloseq.top100,
ordination.nmds.unifrac,
color = "Population",
title = "Unifrac NMDS by sample") +
stat_ellipse(level = 0.95)
NMDS.WUniFrac.plot.top100 <- plot_ordination(my_phyloseq.top100,
ordination.nmds.wunifrac,
color = "Population",
title = "Weighted Unifrac NMDS by
sample") +
stat_ellipse(level = 0.95)
ggarrange(NMDS.bray.plot.top100, ggarrange(NMDS.UniFrac.plot.top100,
NMDS.WUniFrac.plot.top100, ncol = 2),
labels = c("A", "B"), nrow = 2)
PCoA
ordination.pcoa.bray.top100 <- ordinate(my_phyloseq.top100, method = "PCoA",
distance = "bray")
ordination.pcoa.unifrac.top100 <- ordinate(my_phyloseq.top100, method = "PCoA",
distance = "unifrac")
ordination.pcoa.wunifrac.top100 <- ordinate(my_phyloseq.top100, method = "PCoA",
distance = "unifrac",
weighted = TRUE)
PCOA.bray.plot.top100 <- plot_ordination(my_phyloseq.top100,
ordination.pcoa.bray,
color = "Population",
title = "Bray PCoA by sample") +
stat_ellipse(level = 0.95)
PCOA.UniFrac.plot.top100 <- plot_ordination(my_phyloseq.top100,
ordination.pcoa.unifrac,
color = "Population",
title = "Unifrac PCoA by sample") +
stat_ellipse(level = 0.95)
PCOA.WUniFrac.plot.top100 <- plot_ordination(my_phyloseq.top100,
ordination.pcoa.wunifrac,
color = "Population",
title = "Weighted Unifrac PCoA by
sample") +
stat_ellipse(level = 0.95)
ggarrange(PCOA.bray.plot.top100, ggarrange(PCOA.UniFrac.plot.top100,
PCOA.WUniFrac.plot.top100, ncol = 2,
common.legend = TRUE,
legend = "right"),
labels = c("A", "B"), nrow = 2, legend = "right")
Extraire les matrices de diversité bêta
distance.matrix.unifrac.top100 <- UniFrac(my_phyloseq.top100)
distance.matrix.wunifrac.top100 <- UniFrac(my_phyloseq.top100, weighted = TRUE)
distance.matrix.bray.top100 <- phyloseq::distance(my_phyloseq.top100,
method = "bray")
ADONIS
adonis(distance.matrix.unifrac.top100 ~ Population, data = metadata_df,
permutations = 1000)[-5]
## $aov.tab
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Population 3 1.68496 0.56165 7.4758 0.73708 0.000999 ***
## Residuals 8 0.60103 0.07513 0.26292
## Total 11 2.28599 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $call
## adonis(formula = distance.matrix.unifrac.top100 ~ Population,
## data = metadata_df, permutations = 1000)
##
## $coefficients
## NULL
##
## $coef.sites
## [,1] [,2] [,3] [,4] [,5] [,6]
## (Intercept) 0.4549049 0.4868563 0.4717861 0.4620570 0.60436989 0.6332321
## Population1 -0.2978475 -0.3178382 -0.2871909 -0.3196743 0.09857812 0.1656151
## Population2 0.3395123 0.2901657 0.2365505 0.3092715 -0.25622549 -0.2603932
## Population3 -0.3682345 -0.2659106 -0.1690989 -0.3464993 0.13548069 0.2016356
## [,7] [,8] [,9] [,10] [,11] [,12]
## (Intercept) 0.68727397 0.6380183 0.4782833 0.4476370 0.5433057 0.5433057
## Population1 0.04229701 0.1817198 -0.2535989 -0.3093910 0.2247939 0.2247939
## Population2 -0.15213170 -0.2098484 0.3280012 0.3360917 0.0407321 0.0407321
## Population3 0.06140737 0.2186083 -0.4239998 -0.3933536 0.2777797 0.2777797
##
## $model.matrix
## (Intercept) Population1 Population2 Population3
## MCP1a 1 1 0 0
## MCP2a 1 1 0 0
## MCP4a 1 1 0 0
## MCP5a 1 1 0 0
## MWP12a 1 0 1 0
## MWP18a 1 0 1 0
## MWP19a 1 0 1 0
## MWP8a 1 0 1 0
## RCP5a 1 0 0 1
## RCP6a 1 0 0 1
## RWP6a 1 -1 -1 -1
## RWP7a 1 -1 -1 -1
##
## $terms
## distance.matrix.unifrac.top100 ~ Population
## attr(,"variables")
## list(distance.matrix.unifrac.top100, Population)
## attr(,"factors")
## Population
## distance.matrix.unifrac.top100 0
## Population 1
## attr(,"term.labels")
## [1] "Population"
## attr(,"order")
## [1] 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
adonis(distance.matrix.wunifrac.top100 ~ Population, data = metadata_df,
permutations = 1000)[-5]
## $aov.tab
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Population 3 1.38968 0.46323 5.4737 0.67242 0.000999 ***
## Residuals 8 0.67702 0.08463 0.32758
## Total 11 2.06669 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $call
## adonis(formula = distance.matrix.wunifrac.top100 ~ Population,
## data = metadata_df, permutations = 1000)
##
## $coefficients
## NULL
##
## $coef.sites
## [,1] [,2] [,3] [,4] [,5] [,6]
## (Intercept) 0.53396030 0.5280003 0.5330731 0.53386148 0.5179711 0.5173437
## Population1 -0.31048006 -0.3667180 -0.3595128 -0.29829716 0.1623279 0.1735411
## Population2 0.16659954 0.2187462 0.2266434 0.15915025 -0.2352939 -0.2370884
## Population3 -0.08410979 -0.1273152 -0.1550752 -0.07728042 0.1258635 0.1275986
## [,7] [,8] [,9] [,10] [,11]
## (Intercept) 0.61275243 0.5185493 0.52495413 0.5321491 0.47718915
## Population1 0.11796187 0.2795871 0.10561788 -0.3201638 0.30715233
## Population2 -0.18035533 -0.1467733 0.05331753 0.2329286 -0.04897442
## Population3 0.07537625 0.1912439 -0.19853309 -0.2057280 0.21789157
## [,12]
## (Intercept) 0.47693314
## Population1 0.30699756
## Population2 -0.04883428
## Population3 0.21765019
##
## $model.matrix
## (Intercept) Population1 Population2 Population3
## MCP1a 1 1 0 0
## MCP2a 1 1 0 0
## MCP4a 1 1 0 0
## MCP5a 1 1 0 0
## MWP12a 1 0 1 0
## MWP18a 1 0 1 0
## MWP19a 1 0 1 0
## MWP8a 1 0 1 0
## RCP5a 1 0 0 1
## RCP6a 1 0 0 1
## RWP6a 1 -1 -1 -1
## RWP7a 1 -1 -1 -1
##
## $terms
## distance.matrix.wunifrac.top100 ~ Population
## attr(,"variables")
## list(distance.matrix.wunifrac.top100, Population)
## attr(,"factors")
## Population
## distance.matrix.wunifrac.top100 0
## Population 1
## attr(,"term.labels")
## [1] "Population"
## attr(,"order")
## [1] 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
adonis(distance.matrix.bray.top100 ~ Population, data = metadata_df,
permutations = 1000)[-5]
## $aov.tab
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Population 3 2.6841 0.89471 4.5011 0.62796 0.000999 ***
## Residuals 8 1.5902 0.19878 0.37204
## Total 11 4.2744 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $call
## adonis(formula = distance.matrix.bray.top100 ~ Population, data = metadata_df,
## permutations = 1000)
##
## $coefficients
## NULL
##
## $coef.sites
## [,1] [,2] [,3] [,4] [,5] [,6]
## (Intercept) 0.6609068 0.6336865 0.6302456 0.6427188 0.90461964 0.92329485
## Population1 -0.4513137 -0.4902623 -0.4824304 -0.4693451 0.06818212 0.07670515
## Population2 0.3276275 0.3626594 0.3603811 0.3416693 -0.24372108 -0.23011544
## Population3 -0.2103677 -0.2387106 -0.2465416 -0.2296055 0.08015861 0.07670515
## [,7] [,8] [,9] [,10] [,11] [,12]
## (Intercept) 0.93007219 0.92593824 0.7342166 0.6219184 0.7537606 0.7537583
## Population1 0.05702100 0.07406176 -0.1015929 -0.4333761 0.2446934 0.2446863
## Population2 -0.18523248 -0.22218529 0.2616226 0.3688094 0.2462394 0.2462417
## Population3 0.05828367 0.07406176 -0.4258131 -0.3135149 0.2462394 0.2462417
##
## $model.matrix
## (Intercept) Population1 Population2 Population3
## MCP1a 1 1 0 0
## MCP2a 1 1 0 0
## MCP4a 1 1 0 0
## MCP5a 1 1 0 0
## MWP12a 1 0 1 0
## MWP18a 1 0 1 0
## MWP19a 1 0 1 0
## MWP8a 1 0 1 0
## RCP5a 1 0 0 1
## RCP6a 1 0 0 1
## RWP6a 1 -1 -1 -1
## RWP7a 1 -1 -1 -1
##
## $terms
## distance.matrix.bray.top100 ~ Population
## attr(,"variables")
## list(distance.matrix.bray.top100, Population)
## attr(,"factors")
## Population
## distance.matrix.bray.top100 0
## Population 1
## attr(,"term.labels")
## [1] "Population"
## attr(,"order")
## [1] 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>